library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

RRPLOTS and flchain

odata <- flchain
odata$chapter <- NULL
pander::pander(table(odata$death))
0 1
5705 2169
rownames(odata) <- c(1:nrow(odata))
data <- as.data.frame(model.matrix(Surv(futime,death)~.,odata))

data$`(Intercept)` <- NULL

dataFL <- as.data.frame(cbind(time=odata[rownames(data),"futime"],
                              status=odata[rownames(data),"death"],
                              data))
pander::pander(table(dataFL$status))
0 1
4562 1962
dataFL$time <- dataFL$time/365

Exploring Raw Features with RRPlot

convar <- colnames(dataFL)[lapply(apply(dataFL,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataFL[,c("status",convar)],"status")
pander::pander(topvar)
age kappa lambda creatinine
0 0 0 0
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]

topFeature <- RRPlot(cbind(dataFL$status,dataFL[,topFive[1]]),
                  title=topFive[1])

  par(op)

## With Survival Analysis
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(dataFL$status,dataFL[,topf]),
                  timetoEvent=dataFL$time,
                  atRate=c(0.90,0.80),
                  title=topf)
  idx <- idx + 1
  par(op)
}

names(RRanalysis) <- topFive

Reporting the Metrics

pander::pander(t(RRanalysis[[1]]$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100
Thr 73.000 69.000 69.000 53.000 5.00e+01
RR 4.013 4.399 4.414 5.679 1.00e+00
RR_LCI 3.740 4.045 4.059 4.450 0.00e+00
RR_UCI 4.305 4.783 4.799 7.248 0.00e+00
SEN 0.581 0.713 0.713 0.968 1.00e+00
SPE 0.883 0.790 0.792 0.210 4.38e-04
BACC 0.732 0.752 0.752 0.589 5.00e-01
ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.822 0.810 0.833
kappa 0.682 0.667 0.696
lambda 0.665 0.650 0.680
creatinine 0.588 0.572 0.604
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.774 0.775 0.765 0.785
kappa 0.671 0.671 0.658 0.684
lambda 0.657 0.657 0.645 0.669
creatinine 0.584 0.584 0.570 0.598
pander::pander(LogRangp)
age 0.00e+00
kappa 4.90e-175
lambda 4.41e-145
creatinine 2.67e-67
pander::pander(Sensitivity)
  est lower upper
age 0.581 0.558 0.602
kappa 0.319 0.298 0.340
lambda 0.300 0.279 0.321
creatinine 0.288 0.269 0.309
pander::pander(Specificity)
  est lower upper
age 0.883 0.873 0.892
kappa 0.900 0.891 0.908
lambda 0.899 0.890 0.907
creatinine 0.865 0.854 0.875
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe
age 0.822 0.774 0.581 0.883
kappa 0.682 0.671 0.319 0.900
lambda 0.665 0.657 0.300 0.899
creatinine 0.588 0.584 0.288 0.865

Train Test Set

trainsamples <- sample(nrow(dataFL),0.5*nrow(dataFL))
dataFLTrain <- dataFL[trainsamples,]
dataFLTest <- dataFL[-trainsamples,]


pander::pander(table(dataFLTrain$status))
0 1
2314 948
pander::pander(table(dataFLTest$status))
0 1
2248 1014

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataFLTrain,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower mean upper u.Accuracy r.Accuracy
age 0.1021 0.0736 0.0800 0.0871 0.712 0.610
sexM 0.3717 0.1800 0.3034 0.4458 0.531 0.712
flc.grp 0.0757 0.0216 0.0578 0.0895 0.610 0.712
kappa 0.1495 0.0922 0.1707 0.2978 0.666 0.706
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
age 0.707 0.735 0.632 0.736 0.19568 0.8480
sexM 0.707 0.511 0.740 0.736 0.00729 0.0429
flc.grp 0.707 0.632 0.739 0.736 0.00529 0.3065
kappa 0.707 0.639 0.736 0.736 0.00208 -0.0984
  z.IDI z.NRI Delta.AUC Frequency
age 26.65 25.05 -1.04e-01 1
sexM 5.51 1.12 4.51e-03 1
flc.grp 3.92 8.13 3.27e-03 1
kappa 2.66 -2.57 -2.55e-05 1

Cox Model Performance

The evaluation of the raw Cox model with RRPlot()

timeinterval <- 5

h0 <- sum(dataFLTrain$status & dataFLTrain$time <= timeinterval)
h0 <- h0/sum((dataFLTrain$time > timeinterval) | (dataFLTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.13 5
index <- predict(ml,dataFLTrain)
rdata <- cbind(dataFLTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event

toinclude <- rdata[,1]==1 
obstiemToEvent <- dataFLTrain[,"time"]
summary(obstiemToEvent)

Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000 8.146 11.789 10.043 13.060 14.079

tmin<-min(obstiemToEvent)
if (tmin < 0.01) tmin <- 0.01
tmax<-max(obstiemToEvent)
sum(toinclude)

[1] 948

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
timetoEvent[is.infinite(timetoEvent)] <- 3*tmax
timetoEvent[timetoEvent > 3*tmax] <- 3*tmax
timetoEvent[timetoEvent < tmin] <- tmin

lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.256 0.0104 24.5 3.32e-103
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
948 5.44 0.388 0.388
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)

8.51

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataFLTrain,"status","time")


pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.241 0.695 14.7

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataFLTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event after calibration

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.0995 0.00523 19 1.06e-68
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
948 5.91 0.277 0.276
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- c(MADerror2,mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude])))
pander::pander(MADerror2)

8.51 and 17.09